In [1]:
    
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sn; sn.set()
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from jupyterworkflow.data import get_freemont_data
    
In [2]:
    
data = get_freemont_data()
data.head()
    
    Out[2]:
In [3]:
    
data.resample('W').sum().plot()
    
    Out[3]:
    
In [4]:
    
ax = data.resample('D').sum().rolling(365).sum().plot();
ax.set_ylim(0, None);
    
    
In [5]:
    
data.groupby(data.index.time).mean().plot();
    
    
In [6]:
    
pivoted = data.pivot_table('Total', index=data.index.time,
                          columns=data.index.date)
pivoted.iloc[:5, :5]
    
    Out[6]:
In [7]:
    
pivoted.plot(legend=False, alpha=0.01);
    
    
In [8]:
    
X = pivoted.fillna(0).T.values 
X.shape
    
    Out[8]:
In [9]:
    
X2 = PCA(2, svd_solver='full').fit_transform(X)
X2.shape
    
    Out[9]:
In [10]:
    
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(2)
gmm.fit(X)
labels = gmm.predict(X)
labels
    
    Out[10]:
In [11]:
    
import matplotlib.pyplot as plt
plt.scatter(X2[:,0], X[:,1], c=labels, cmap='rainbow')
plt.colorbar()
    
    Out[11]:
    
In [12]:
    
fix, ax = plt.subplots(1, 2, figsize=(14, 6))
pivoted.T[labels == 0].T.plot(legend=False, alpha=0.1, ax=ax[0]);
pivoted.T[labels == 1].T.plot(legend=False, alpha=0.1, ax=ax[1]);
ax[0].set_title('Purple Cluster');
ax[1].set_title('Red Cluster');
    
    
In [13]:
    
dayofweek = pd.DatetimeIndex(pivoted.columns).dayofweek
    
In [14]:
    
plt.scatter(X2[:,0], X[:,1], c=dayofweek, cmap='rainbow')
plt.colorbar()
    
    Out[14]:
    
In [15]:
    
dates = pd.DatetimeIndex(pivoted.columns)
dates[(labels == 1) & (dayofweek < 5)]
    
    Out[15]: